Possibili analisi:
import numpy as np
import pandas as pd
%matplotlib inline
import math
import numpy as np
import scipy.stats as stats
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import seaborn as sns
from datetime import datetime
from pandas import Timestamp
import collections
import os
dirname = '../csv'
#for csv in os.listdir(dirname):
# leggo il csv
#df.drop(['Unnamed: 0'], axis='columns', inplace=True)
#df['Created_At'] = pd.to_datetime(df['Created_At'])
df = pd.read_csv('../csv/df_uidFiles_2016.csv')
df.drop(['Unnamed: 0'], axis='columns', inplace=True)
df['Created_At'] = pd.to_datetime(df['Created_At'])
df.head()
#Preparazione per la mappa folium successiva
locations = df[['Lat', 'Lon']]
locationlist = locations.values.tolist()
df.info()
df.describe()
Vedo che il cluster con più dati e quindi utenti che vi hanno tweettato è il 1 con 306 dati su tweet, mentre già dal -1 (il terzo cluster con più utenti) ci sono solo 62 utenti, quasi 1/6 del primo cluster.
users = df['Screen_name']
users.value_counts()
users = df['UserID']
users.value_counts()
L'utente che ha tweettato di più è Colombalejo (User ID = 1372289694) con 22 tweet.
df['Day'] = ""
for i in range (0, len(df['Created_At'])):
day = df['Created_At'][i]
df['Day'][i] = df['Day'][i].replace("", str(day.day))
df
unique_users = df.groupby(['Day'])['Screen_name'].unique() #.count() #.sort_values(ascending=False)
unique_users
#raggruppo il df in base al giorno
Days = df.groupby(['Day'])
i=0
max_users = {}
max_users_perDay = {}
for group in Days.groups:
day = Days.get_group(group)
users = day['Screen_name'].tolist()
users_collection = collections.Counter(users)
dict(users_collection)
max_users[max(users_collection, key = users_collection.get)] = max(users_collection.values())
'''
if(int(day['Day'].values[0]) < 15):
max_users_perDay[day['Day'].values[0] + ' Oct'] = max(users_collection, key = users_collection.get)
else:
max_users_perDay[day['Day'].values[0] + ' Sep'] = max(users_collection, key = users_collection.get)
'''
max_users_perDay[pd.to_datetime(np.datetime64(day['Created_At'].values[0])).date()] = max(users_collection, key = users_collection.get)
max_users_perDay
max_users
df_max_users = pd.DataFrame(data=max_users_perDay.items(), columns=['Day', 'User'])
df_max_users['Number of tweets'] = max_users.values()
df_max_users = df_max_users.sort_values(by=['Day']).reset_index(drop=True)
df_max_users
import plotly
import pandas as pd
import numpy as np
import seaborn as sns
import plotly.express as px
x = df_max_users['Day']
y = df_max_users['Number of tweets']
import plotly.express as px
data = px.data.gapminder()
fig = px.bar(df_max_users, x=x, y=y,
color=y, hover_data=['User', 'Number of tweets'], height=400,
title="Users with the highest number of tweets per day in 2016")
fig.show()
import folium
from folium import plugins
from shapely.geometry import Point, Polygon, LineString
#le coordinate sono: [(43.7359, 10.4269), (43.6955, 10.3686)]
lat = [43.7359, 43.6955]
lon = [10.4269, 10.3686]
lat_mean = np.mean(lat)
lon_mean = np.mean(lon)
coords = df['Coords']
m = folium.Map(location=[lat_mean, lon_mean], tiles='Stamen Toner', zoom_start=13.2, control_scale=True)
for point in range(0, len(locationlist)):
folium.Marker(locationlist[point], popup=df['Screen_name'][point]).add_to(m)
m
map2 = folium.Map(location=[lat_mean, lon_mean], tiles='CartoDB positron', zoom_start=13.2)
marker_cluster = folium.plugins.MarkerCluster().add_to(map2)
for point in range(0, len(locationlist)):
folium.Marker(locationlist[point], popup=df['Screen_name'][point]).add_to(marker_cluster)
map2
for group in Days.groups:
globals()[f"day{group}"] = Days.get_group(group).reset_index(drop=True)
Abbiamo:
#Prova con day22
locations_day22 = day22[['Lat', 'Lon']]
locationlist_day22 = locations_day22.values.tolist()
map_day22 = folium.Map(location=[lat_mean, lon_mean], tiles='CartoDB positron', zoom_start=15)
marker_cluster_day22 = folium.plugins.MarkerCluster().add_to(map_day22)
for point in range(0, len(locationlist_day22)):
folium.Marker(locationlist_day22[point], popup=df['Screen_name'][point]).add_to(marker_cluster_day22)
map_day22